clear
clear global
currentfolder='C:\Users\tbold\Dropbox\Tessa and Tobi\JEEA Replication';    
filetosave='evidence_of_groups_vill_P1_Mar21'; % name of workspace saved
village=2;
datafile='perm_vill_2';
outputpath=[currentfolder '\Data\Output'];
programpath=[currentfolder '\Programmes'];
tablepath=[currentfolder '\Latex'];
% 
cd(outputpath)
eval(['load ' datafile '.out;']);
eval(['HID=' datafile '(:,1);']);
eval(['YEAR=' datafile '(:,2);']);
eval(['DCONS=' datafile '(:,3);']);
eval(['DCONSRES=' datafile '(:,4);']);
cd(programpath)
DATA=[HID YEAR DCONS DCONSRES];

CONSDATA=DATA(:,4);
years=6;
perms=10000;
%this is for village 2
households=37;

for hh1=1:households
    for hh2=1:households
        index1=(hh1-1)*years;
        index2=(hh2-1)*years;
        X1=CONSDATA(index1+1:index1+years,1);
        X2=CONSDATA(index2+1:index2+years,1);
        [a,b]=corrcoef(X1,X2);
        CONS_CORR(hh1,hh2)=a(2,1);
    end
end

CONS_CORR_RESHAPE=reshape(CONS_CORR,37*37,1);




%%%%compare this to the correlations you get in the model
load('permmodel2.mat')
DC_LOG_COND_ALL=DC_LOG_COND2;
DC_LOG_ALL=DC_LOG_2;
RES_COND_ALL=RES_COND2;
load('permmodel2_village_RS.mat')
DC_LOG_COND_ALL(1:size(DC_LOG_COND2,1),:,179,:)=DC_LOG_COND2(:,:,:);
DC_LOG_ALL(1:size(DC_LOG_2,1),:,179,:)=DC_LOG_2(:,:,:);
RES_COND_ALL(1:size(RES_COND2,1),:,179,:)=RES_COND2(:,:,:);

clear DC_LOG_COND2 RES_COND_2 DC_LOG_2
%DC_LOG_COND_ALL=RES_COND_ALL;

tic
vss_high_inddev=37-1;                 % the number of total village members is vss_high_inddev+1 under individual deviations
vss_high_coaldev=37-1;

%%%%make a MEMBERSHIP matrix for the census
census=180;
groupsizeset=[4 vss_high_inddev+1 census]; %

%%%record who would be a member in the same group for different partitions
%%%of the village in the model
for gg=1:length(groupsizeset)
    groupsize=groupsizeset(gg);
    Nvill=ceil((vss_high_inddev+1)/(groupsize))*6; %
    for k=1:Nvill
        PARTITION(k)=(k-1)*groupsize+groupsize;
    end
    for i=1:census
        for j=1:census
        rri=find(i<=PARTITION);
        rrj=find(j<=PARTITION);
        if length(rri)==length(rrj)
           MEMBERCENSUS(i,j,gg)=1;
        else
           MEMBERCENSUS(i,j,gg)=0;
        end
        end
    end
    clear PARTITION 
end

%%%%sampling the whole village
for Qbeta=1:2
for gg=1:length(groupsizeset)
groupsize=groupsizeset(gg);
vss=groupsize-1;
census=180;

AA=[1:census]';

sample=census;
%tstart=randi(size(DC_LOG_COND_ALL,2)-10);


        X1=DC_LOG_COND_ALL(AA(1:sample),:,vss,Qbeta)';
        X2=DC_LOG_COND_ALL(AA(1:sample),:,vss,Qbeta)';
        CONS_CORR_MODEL_PERFECT(1:sample,1:sample,gg,Qbeta)=corr(X1,X2);
end

end


%distinguishing
%between correlations for households that share membership of a
%risk-sharing group, and those who are not connected. 
for Qbeta=1:2;
count=0
for gg=[1 2 3] %this is group size 4, 37 and 180
    count=count+1;

MEMBERCORRS=[];
NONMEMBERCORRS=[];

for i=1:census
    for j=1:census
    index1=i;
    index2=j;
    if MEMBERCENSUS(index1,index2,gg)==1 & index1~=index2
    MEMBERCORRS=[MEMBERCORRS ; CONS_CORR_MODEL_PERFECT(i,j,gg,Qbeta)];
    end
    if MEMBERCENSUS(index1,index2,gg)==0 
    NONMEMBERCORRS=[NONMEMBERCORRS ; CONS_CORR_MODEL_PERFECT(i ,j,gg,Qbeta)];
    end
    end
end
MEANMEMBERCORRS(count,Qbeta)=mean(MEMBERCORRS);
MEANNONMEMBERCORRS(count,Qbeta)=mean(NONMEMBERCORRS);

VARMEMBERCORRS(count,Qbeta)=var(MEMBERCORRS);
VARNONMEMBERCORRS(count,Qbeta)=var(NONMEMBERCORRS);

ALLCORRS(:,count,Qbeta)=[NONMEMBERCORRS; MEMBERCORRS];
SHAREMEMBERS_CENSUS(count,Qbeta)=length(MEMBERCORRS)/length([NONMEMBERCORRS; MEMBERCORRS]);
end
end


pts=[-1:0.001:1];
for Qbeta=1:2
for count=1:size(ALLCORRS,2)
ALLCORRS_RS=squeeze(ALLCORRS(:,count,Qbeta));
ALLCORRS_SORT=sort(ALLCORRS_RS);
MEAN_ALLCORRS_SORT(:,count,Qbeta)=ALLCORRS_SORT;
[f,xi]=ksdensity(MEAN_ALLCORRS_SORT(:,count,Qbeta),pts,'Function','cdf');
CDF_ALLCORRS(:,count,Qbeta)=f;
[f,xi]=ksdensity(MEAN_ALLCORRS_SORT(:,count,Qbeta),pts);
PDF_ALLCORRS(:,count,Qbeta)=f;
end
end

CONS_CORR_SORT=sort(CONS_CORR_RESHAPE);
CONS_CORR_SORT(end:-1:end-36)=1; %remove the i,i correlations
[f,xi]=ksdensity(CONS_CORR_SORT,pts,'Function','cdf');
CDF_CORR=f;
[f,xi]=ksdensity(CONS_CORR_SORT,pts);
PDF_CORR=f;

%%%PDFs (just for illustration, automated below)
subplot(2,1,1)
plot(pts,PDF_ALLCORRS(:,[1 2 3],1))

subplot(2,1,2)
plot(pts,PDF_ALLCORRS(:,[1 2 3],2))

PDF_ALLCORRS_CENSUS=PDF_ALLCORRS;


%%%%%and sampling a short panel from the long census
ALLCORRS=[];
for Qbeta=1:2

for perm=1:10000
for gg=1:length(groupsizeset)
groupsize=groupsizeset(gg);
vss=groupsize-1;
census=180;
if perm==1
AA=[1:census]';
else
rng(perm,'twister');    
AA=randperm(census)';
end
sample=vss_high_inddev+1;
rng(perm,'twister'); 
tstart=randi(size(DC_LOG_COND_ALL,2)-10);


        X1=DC_LOG_COND_ALL(AA(1:sample),tstart:tstart+6,vss,Qbeta)';
        X2=DC_LOG_COND_ALL(AA(1:sample),tstart:tstart+6,vss,Qbeta)';
        CONS_CORR_MODEL_SMALLTSMALLN(1:sample,1:sample,gg,Qbeta,perm)=corr(X1,X2);
        SAMPLE(:,gg,Qbeta,perm)=AA(1:sample);
              

end

end
end



for Qbeta=1:2;
count=0
for gg=[1 2 3] %
    count=count+1;
for perm=1:10000
MEMBERCORRS=[];
MEMBERSINDEX=[];
NONMEMBERCORRS=[];
NONMEMBERSINDEX=[];

for i=1:37
    for j=1:37
    index1=SAMPLE(i,gg,Qbeta,perm);
    index2=SAMPLE(j,gg,Qbeta,perm);
    if MEMBERCENSUS(index1,index2,gg)==1 & index1~=index2
    MEMBERCORRS=[MEMBERCORRS ; CONS_CORR_MODEL_SMALLTSMALLN(i,j,gg,Qbeta,perm)];
    MEMBERSINDEX=[MEMBERSINDEX; sub2ind([37 37], i ,j)];
    end
    if MEMBERCENSUS(index1,index2,gg)==0 
    NONMEMBERCORRS=[NONMEMBERCORRS ; CONS_CORR_MODEL_SMALLTSMALLN(i ,j,gg,Qbeta,perm)];
    NONMEMBERSINDEX=[NONMEMBERSINDEX; sub2ind([37 37],i ,j)];
    end
    end
end
ALLCORRS(:,count,Qbeta,perm)=[NONMEMBERCORRS; MEMBERCORRS];
SHAREMEMBERS_SAMPLE(count,Qbeta)=length(MEMBERCORRS)/length([NONMEMBERCORRS; MEMBERCORRS]);

gg

MEANCORR(1,gg,Qbeta)=mean(NONMEMBERCORRS);
MEANCORR(2,gg,Qbeta)=mean(MEMBERCORRS);
MEANCORR(3,gg,Qbeta)=mean([NONMEMBERCORRS ;MEMBERCORRS]);

VARCORR(1,gg,Qbeta)=var(NONMEMBERCORRS);
VARCORR(2,gg,Qbeta)=var(MEMBERCORRS);
VARCORR(3,gg,Qbeta)=var([NONMEMBERCORRS ;MEMBERCORRS]);

end
end
end

pts=[-1:0.001:1];
MEAN_ALLCORRS_SORT=[];
for Qbeta=1:2
for count=1:size(ALLCORRS,2)
ALLCORRS_RS=squeeze(ALLCORRS(:,count,Qbeta,:));
ALLCORRS_SORT=sort(ALLCORRS_RS);
MEAN_ALLCORRS_SORT(:,count,Qbeta)=mean(ALLCORRS_SORT,2);
[f,xi]=ksdensity(MEAN_ALLCORRS_SORT(:,count,Qbeta),pts,'Function','cdf');
CDF_ALLCORRS(:,count,Qbeta)=f;
[f,xi]=ksdensity(MEAN_ALLCORRS_SORT(:,count,Qbeta),pts);
PDF_ALLCORRS(:,count,Qbeta)=f;
end
end

CONS_CORR_SORT=sort(CONS_CORR_RESHAPE);
CONS_CORR_SORT(end:-1:end-36)=1; %remove the i,i correlations
[f,xi]=ksdensity(CONS_CORR_SORT,pts);
PDF_CORR=f;

%%%PDF, just for illustration
subplot(2,1,1)
plot(pts,PDF_CORR,'--')
hold on
plot(pts,PDF_ALLCORRS(:,[1 2 3],1))

subplot(2,1,2)
plot(pts,PDF_CORR,'--')
hold on
plot(pts,PDF_ALLCORRS(:,[1 2 3],2))


%%%%and automatize the picture
X1=pts;
YMatrix1=[PDF_ALLCORRS_CENSUS(:,[1 2 3],1)];
YMatrix2=[PDF_CORR' PDF_ALLCORRS(:,[1 2 3],1)];
YMatrix3=[PDF_ALLCORRS_CENSUS(:,[1 2 3],2)];
YMatrix4=[PDF_CORR' PDF_ALLCORRS(:,[1 2 3],2)];
%%%%this creates Figure 5
correlations_picture(X1, YMatrix1, YMatrix2, YMatrix3, YMatrix4)
%for best result, first manually resize the figure to screen size
cd(tablepath)
saveas(gcf,'correlations_all_08_06_2021','epsc')